import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


german_credit_df = pd.read_csv(r"C:\Users\jki\Downloads\german_credit_data.csv")
german_credit_df.head(5)


print(german_credit_df.columns)

Index(['Unnamed: 0', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts',
       'Checking account', 'Credit amount', 'Duration', 'Purpose'],
      dtype='object')


print("Purpose : ",german_credit_df.Purpose.unique())
print("Sex : ",german_credit_df.Sex.unique())
print("Housing : ",german_credit_df.Housing.unique())
print("Saving accounts : ",german_credit_df['Saving accounts'].unique())
print("Checking account : ",german_credit_df['Checking account'].unique())

Purpose :  ['radio/TV' 'education' 'furniture/equipment' 'car' 'business'
 'domestic appliances' 'repairs' 'vacation/others']
Sex :  ['male' 'female']
Housing :  ['own' 'free' 'rent']
Saving accounts :  [nan 'little' 'quite rich' 'rich' 'moderate']
Checking account :  ['little' 'moderate' nan 'rich']


german_credit_df['Saving accounts'] = german_credit_df['Saving accounts'].map({"little":0,"moderate":1,"quite rich":2 ,"rich":3 });
german_credit_df['Saving accounts'] = german_credit_df['Saving accounts'].fillna(german_credit_df['Saving accounts'].dropna().mean())

german_credit_df['Checking account'] = german_credit_df['Checking account'].map({"little":0,"moderate":1,"rich":2 });
german_credit_df['Checking account'] = german_credit_df['Checking account'].fillna(german_credit_df['Checking account'].dropna().mean())

german_credit_df['Sex'] = german_credit_df['Sex'].map({"male":0,"female":1}).astype(float);

german_credit_df['Housing'] = german_credit_df['Housing'].map({"own":0,"free":1,"rent":2}).astype(float);

german_credit_df['Purpose'] = german_credit_df['Purpose'].map({'radio/TV':0, 'education':1, 'furniture/equipment':2, 'car':3, 'business':4,
       'domestic appliances':5, 'repairs':6, 'vacation/others':7}).astype(float);

german_credit_df.head(10)


plt.scatter(german_credit_df['Credit amount'],german_credit_df["Age"])
plt.figure()

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>


sns.pairplot(german_credit_df)

<seaborn.axisgrid.PairGrid at 0x1b118e61650>


plt.scatter(german_credit_df['Credit amount'],german_credit_df["Duration"])
plt.figure()

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>


plt.scatter(german_credit_df['Saving accounts'],german_credit_df["Duration"])
plt.figure()

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>


fig = german_credit_df["Purpose"].hist(bins=8)
fig.text(-1, 150, 'Frequency', ha='center')
fig.text(0, -30, 'Radio', ha='center')
fig.text(1, -50, 'education', ha='center')
fig.text(2, -30, 'furniture', ha='center')
fig.text(3, -50, 'car', ha='center')
fig.text(4, -30, 'business', ha='center')
fig.text(5, -50, 'appliances', ha='center')
fig.text(6, -30, 'repairs', ha='center')
fig.text(7, -50, 'vacation', ha='center')

Text(7, -50, 'vacation')


limitedCredit = german_credit_df[(german_credit_df["Credit amount"]<=5000)==True];
imitedCredit = german_credit_df[(german_credit_df["Credit amount"]>2000)==True];
fig = limitedCredit["Purpose"].hist(bins=8)
fig.text(-1, 150, 'Frequency', ha='center')
fig.text(0, -30, 'Radio', ha='center')
fig.text(1, -50, 'education', ha='center')
fig.text(2, -30, 'furniture', ha='center')
fig.text(3, -50, 'car', ha='center')
fig.text(4, -30, 'business', ha='center')
fig.text(5, -50, 'appliances', ha='center')
fig.text(6, -30, 'repairs', ha='center')
fig.text(7, -50, 'vacation', ha='center')

Text(7, -50, 'vacation')


fig =german_credit_df.Age.hist(bins=60)
fig.text(40, -10, 'Age', ha='center')
fig.text(0, 40, 'Frequency', ha='center')

Text(0, 40, 'Frequency')


fig = german_credit_df["Job"].hist()
fig.text(-0.5, 400, 'Frequency', ha='center')
fig.text(0, -100, 'UnSkilled', ha='center')
fig.text(1, -100, 'UnSkilled Resident', ha='center')
fig.text(2, -100, 'Skilled', ha='center')
fig.text(3, -100, 'Highly Skilled', ha='center')

Text(3, -100, 'Highly Skilled')


from sklearn.cluster import KMeans;
from sklearn.decomposition import PCA; 
from sklearn.preprocessing import normalize;
y = KMeans().fit_predict(german_credit_df)
X_norm = normalize(german_credit_df);
y_PCA = PCA(n_components=2).fit_transform(X_norm,2);
y_PCA.shape

C:\Users\jki\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)

(1000, 2)


plt.scatter(german_credit_df['Credit amount'],german_credit_df['Age'],c=y)
plt.figure()
plt.scatter(y_PCA[:,0],y_PCA[:,1],c=y)

<matplotlib.collections.PathCollection at 0x1b1247c90d0>

	Unnamed: 0	Age	Sex	Job	Housing	Saving accounts	Checking account	Credit amount	Duration	Purpose
0	0	67	0.0	2	0.0	0.456548	0.000000	1169	6	0.0
1	1	22	1.0	2	0.0	0.000000	1.000000	5951	48	0.0
2	2	49	0.0	1	0.0	0.000000	0.651815	2096	12	1.0
3	3	45	0.0	2	1.0	0.000000	0.000000	7882	42	2.0
4	4	53	0.0	2	1.0	0.000000	0.000000	4870	24	3.0
5	5	35	0.0	1	1.0	0.456548	0.651815	9055	36	1.0
6	6	53	0.0	2	0.0	2.000000	0.651815	2835	24	2.0
7	7	35	0.0	3	2.0	0.000000	1.000000	6948	36	3.0
8	8	61	0.0	1	0.0	3.000000	0.651815	3059	12	0.0
9	9	28	0.0	3	0.0	0.000000	1.000000	5234	30	3.0

Return Home

German Credit Data Analysis(Python)¶

Problem¶

1. Determine the optimum age to target for customers¶

2.Determine the type of loan that attracts most clients¶

Result:¶

	Unnamed: 0	Age	Sex	Job	Housing	Saving accounts	Checking account	Credit amount	Duration	Purpose
0	0	67	male	2	own	NaN	little	1169	6	radio/TV
1	1	22	female	2	own	little	moderate	5951	48	radio/TV
2	2	49	male	1	own	little	NaN	2096	12	education
3	3	45	male	2	free	little	little	7882	42	furniture/equipment
4	4	53	male	2	free	little	little	4870	24	car